From e0b421d24279d3cce426c0df4bf6d8734c6ce325 Mon Sep 17 00:00:00 2001 From: Keir Fraser Date: Sat, 24 Feb 2007 14:48:17 +0000 Subject: [PATCH] Reduce impact of saving/restoring/dumping large domains on Dom0 memory usage by means of fadvise64() to tell the OS to discard the cache pages used for the save/dump file. Signed-off-by: Simon Graham --- tools/libxc/Makefile | 3 +++ tools/libxc/xc_core.c | 9 ++++++++ tools/libxc/xc_linux.c | 33 +++++++++++++++++++++++++++++ tools/libxc/xc_linux_restore.c | 18 ++++++++++++++-- tools/libxc/xc_linux_save.c | 38 ++++++++++++++++++++++++++++------ tools/libxc/xc_private.h | 10 +++++++++ tools/libxc/xc_solaris.c | 7 +++++++ 7 files changed, 110 insertions(+), 8 deletions(-) diff --git a/tools/libxc/Makefile b/tools/libxc/Makefile index 25933a9be4..6dc76ae782 100644 --- a/tools/libxc/Makefile +++ b/tools/libxc/Makefile @@ -59,6 +59,9 @@ CFLAGS += -Werror -Wmissing-prototypes CFLAGS += -fno-strict-aliasing CFLAGS += $(INCLUDES) -I. +# Needed for posix_fadvise64() in xc_linux.c +CFLAGS-$(CONFIG_Linux) += -D_GNU_SOURCE + # Define this to make it possible to run valgrind on code linked with these # libraries. #CFLAGS += -DVALGRIND -O0 -ggdb3 diff --git a/tools/libxc/xc_core.c b/tools/libxc/xc_core.c index 7ea97cd6d0..31c1532b69 100644 --- a/tools/libxc/xc_core.c +++ b/tools/libxc/xc_core.c @@ -802,6 +802,12 @@ static int local_file_dump(void *args, char *buffer, unsigned int length) } } + if (length >= DUMP_INCREMENT*PAGE_SIZE) { + // Now dumping pages -- make sure we discard clean pages from + // the cache after each write + discard_file_cache(da->fd, 0 /* no flush */); + } + return 0; } @@ -822,6 +828,9 @@ xc_domain_dumpcore(int xc_handle, sts = xc_domain_dumpcore_via_callback( xc_handle, domid, &da, &local_file_dump); + /* flush and discard any remaining portion of the file from cache */ + discard_file_cache(da.fd, 1/* flush first*/); + close(da.fd); return sts; diff --git a/tools/libxc/xc_linux.c b/tools/libxc/xc_linux.c index fa4927aafa..4874c042dd 100644 --- a/tools/libxc/xc_linux.c +++ b/tools/libxc/xc_linux.c @@ -328,6 +328,39 @@ int xc_evtchn_unmask(int xce_handle, evtchn_port_t port) return dorw(xce_handle, (char *)&port, sizeof(port), 1); } +/* Optionally flush file to disk and discard page cache */ +int discard_file_cache(int fd, int flush) +{ + off_t cur = 0; + + if ( flush && (fsync(fd) < 0) ) + { + PERROR("Failed to flush file: %s", strerror(errno)); + return -errno; + } + + /* + * Calculate last page boundary of amount written so far + * unless we are flushing in which case entire cache + * is discarded. + */ + if ( !flush ) + { + if ( (cur = lseek(fd, 0, SEEK_CUR)) == (off_t)-1 ) + cur = 0; + cur &= ~(PAGE_SIZE-1); + } + + /* Discard from the buffer cache. */ + if ( posix_fadvise64(fd, 0, cur, POSIX_FADV_DONTNEED) < 0 ) + { + PERROR("Failed to discard cache: %s", strerror(errno)); + return -errno; + } + + return 0; +} + /* * Local variables: * mode: C diff --git a/tools/libxc/xc_linux_restore.c b/tools/libxc/xc_linux_restore.c index 9627d96031..1c7ab23f75 100644 --- a/tools/libxc/xc_linux_restore.c +++ b/tools/libxc/xc_linux_restore.c @@ -144,7 +144,7 @@ int xc_linux_restore(int xc_handle, int io_fd, unsigned int console_evtchn, unsigned long *console_mfn) { DECLARE_DOMCTL; - int rc = 1, i, n, pae_extended_cr3 = 0; + int rc = 1, i, n, m, pae_extended_cr3 = 0; unsigned long mfn, pfn; unsigned int prev_pc, this_pc; int verify = 0; @@ -331,7 +331,7 @@ int xc_linux_restore(int xc_handle, int io_fd, */ prev_pc = 0; - n = 0; + n = m = 0; while (1) { int j, nr_mfns = 0; @@ -530,6 +530,17 @@ int xc_linux_restore(int xc_handle, int io_fd, munmap(region_base, j*PAGE_SIZE); n+= j; /* crude stats */ + + /* + * Discard cache for portion of file read so far up to last + * page boundary every 16MB or so. + */ + m += j; + if ( m > MAX_PAGECACHE_USAGE ) + { + discard_file_cache(io_fd, 0 /* no flush */); + m = 0; + } } /* @@ -864,6 +875,9 @@ int xc_linux_restore(int xc_handle, int io_fd, free(p2m); free(pfn_type); + /* discard cache for save file */ + discard_file_cache(io_fd, 1 /*flush*/); + DPRINTF("Restore exit with rc=%d\n", rc); return rc; diff --git a/tools/libxc/xc_linux_save.c b/tools/libxc/xc_linux_save.c index 6b51872ac5..a8f100299d 100644 --- a/tools/libxc/xc_linux_save.c +++ b/tools/libxc/xc_linux_save.c @@ -172,6 +172,28 @@ static uint64_t tv_delta(struct timeval *new, struct timeval *old) (new->tv_usec - old->tv_usec); } +static int noncached_write(int fd, int live, void *buffer, int len) +{ + static int write_count = 0; + + int rc = write(fd,buffer,len); + + if (!live) { + write_count += len; + + if (write_count >= MAX_PAGECACHE_USAGE*PAGE_SIZE) { + int serrno = errno; + + /* Time to discard cache - dont care if this fails */ + discard_file_cache(fd, 0 /* no flush */); + + write_count = 0; + + errno = serrno; + } + } + return rc; +} #ifdef ADAPTIVE_SAVE @@ -205,7 +227,7 @@ static inline void initialize_mbit_rate() } -static int ratewrite(int io_fd, void *buf, int n) +static int ratewrite(int io_fd, int live, void *buf, int n) { static int budget = 0; static int burst_time_us = -1; @@ -215,7 +237,7 @@ static int ratewrite(int io_fd, void *buf, int n) long long delta; if (START_MBIT_RATE == 0) - return write(io_fd, buf, n); + return noncached_write(io_fd, live, buf, n); budget -= n; if (budget < 0) { @@ -251,13 +273,13 @@ static int ratewrite(int io_fd, void *buf, int n) } } } - return write(io_fd, buf, n); + return noncached_write(io_fd, live, buf, n); } #else /* ! ADAPTIVE SAVE */ #define RATE_IS_MAX() (0) -#define ratewrite(_io_fd, _buf, _n) write((_io_fd), (_buf), (_n)) +#define ratewrite(_io_fd, _live, _buf, _n) noncached_write((_io_fd), (_live), (_buf), (_n)) #define initialize_mbit_rate() #endif @@ -1082,7 +1104,7 @@ int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, if(race && !live) goto out; - if (ratewrite(io_fd, page, PAGE_SIZE) != PAGE_SIZE) { + if (ratewrite(io_fd, live, page, PAGE_SIZE) != PAGE_SIZE) { ERROR("Error when writing to state file (4)" " (errno %d)", errno); goto out; @@ -1091,7 +1113,7 @@ int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, } else { /* We have a normal page: just write it directly. */ - if (ratewrite(io_fd, spage, PAGE_SIZE) != PAGE_SIZE) { + if (ratewrite(io_fd, live, spage, PAGE_SIZE) != PAGE_SIZE) { ERROR("Error when writing to state file (5)" " (errno %d)", errno); goto out; @@ -1261,6 +1283,10 @@ int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, DPRINTF("Warning - couldn't disable shadow mode"); } } + else { + // flush last write and discard cache for file + discard_file_cache(io_fd, 1 /* flush */); + } if (live_shinfo) munmap(live_shinfo, PAGE_SIZE); diff --git a/tools/libxc/xc_private.h b/tools/libxc/xc_private.h index 7705063326..13935d361b 100644 --- a/tools/libxc/xc_private.h +++ b/tools/libxc/xc_private.h @@ -41,6 +41,13 @@ #define INFO 1 #define PROGRESS 0 +/* +** Define max dirty page cache to permit during save/restore -- need to balance +** keeping cache usage down with CPU impact of invalidating too often. +** (Currently 16MB) +*/ +#define MAX_PAGECACHE_USAGE (4*1024) + #if INFO #define IPRINTF(_f, _a...) printf(_f , ## _a) #else @@ -158,4 +165,7 @@ int xc_waitdomain_core(int xc_handle, int domain, int *status, void bitmap_64_to_byte(uint8_t *bp, const uint64_t *lp, int nbits); void bitmap_byte_to_64(uint64_t *lp, const uint8_t *bp, int nbits); +/* Optionally flush file to disk and discard page cache */ +int discard_file_cache(int fd, int flush); + #endif /* __XC_PRIVATE_H__ */ diff --git a/tools/libxc/xc_solaris.c b/tools/libxc/xc_solaris.c index 889db6396d..4c537291e8 100644 --- a/tools/libxc/xc_solaris.c +++ b/tools/libxc/xc_solaris.c @@ -242,3 +242,10 @@ int xc_evtchn_unmask(int xce_handle, evtchn_port_t port) { return dorw(xce_handle, (char *)&port, sizeof(port), 1); } + +/* Optionally flush file to disk and discard page cache */ +int discard_file_cache(int fd, int flush) +{ + // TODO: Implement for Solaris! + return 0; +} -- 2.30.2